Click Header to Expand(+)/Collapse(-)!cp /content/drive/MyDrive/_Colab/P1_Facial_Keypoints/aux/content.zip .
!unzip content.zip
!mv content/* .
!cp /content/drive/MyDrive/_Colab/P1_Facial_Keypoints/*.py .
from google.colab import drive
drive.mount('/content/drive')
Using your trained facial keypoint detector, you can now do things like add filters to a person's face, automatically. In this optional notebook, you can play around with adding sunglasses to detected face's in an image by using the keypoints detected around a person's eyes. Checkout the images/ directory to see what pther .png's have been provided for you to try, too!
Let's start this process by looking at a sunglasses .png that we'll be working with!
# import necessary resources
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import cv2
# load in sunglasses image with cv2 and IMREAD_UNCHANGED
sunglasses = cv2.imread('images/sunglasses.png', cv2.IMREAD_UNCHANGED)
# plot our image
plt.imshow(sunglasses)
# print out its dimensions
print('Image shape: ', sunglasses.shape)
You'll note that this image actually has 4 color channels, not just 3 as your avg RGB image does. This is due to the flag we set cv2.IMREAD_UNCHANGED, which tells this to read in another color channel.
It has the usual red, blue, and green channels any color image has, and the 4th channel respresents the transparency level of each pixel in the image; this is often called the alpha channel. Here's how the transparency channel works: the lower the value, the more transparent, or see-through, the pixel will become. The lower bound (completely transparent) is zero here, so any pixels set to 0 will not be seen; these look like white background pixels in the image above, but they are actually totally transparent.
This transparent channel allows us to place this rectangular image of sunglasses on an image of a face and still see the face area that is techically covered by the transparentbackground of the sunglasses image!
Let's check out the alpha channel of our sunglasses image in the next Python cell. Because many of the pixels in the background of the image have an alpha value of 0, we'll need to explicitly print out non-zero values if we want to see them.
# print out the sunglasses transparency (alpha) channel
alpha_channel = sunglasses[:,:,3]
print ('The alpha channel looks like this (black pixels = transparent): ')
plt.imshow(alpha_channel, cmap='gray')
# just to double check that there are indeed non-zero values
# let's find and print out every value greater than zero
values = np.where(alpha_channel != 0)
print ('The non-zero values of the alpha channel are: ')
print (values)
This means that when we place this sunglasses image on top of another image, we can use the transparency channel as a filter:
In doing this, it's helpful to understand which keypoint belongs to the eyes, mouth, etc., so in the image below we also print the index of each facial keypoint directly on the image so you can tell which keypoints are for the eyes, eyebrows, etc.,
It may be useful to use keypoints that correspond to the edges of the face to define the width of the sunglasses, and the locations of the eyes to define the placement.
Next, we'll load in an example image. Below, you've been given an image and set of keypoints from the provided training set of data, but you can use your own CNN model to generate keypoints for any image of a face (as in Notebook 3) and go through the same overlay process!
# load in training data
key_pts_frame = pd.read_csv('data/training_frames_keypoints.csv')
# print out some stats about the data
print('Number of images: ', key_pts_frame.shape[0])
# helper function to display keypoints
def show_keypoints(image, key_pts):
"""Show image with keypoints"""
plt.imshow(image)
plt.scatter(key_pts[:, 0], key_pts[:, 1], s=20, marker='.', c='m')
# a selected image
n = 120
image_name = key_pts_frame.iloc[n, 0]
image = mpimg.imread(os.path.join('data/training/', image_name))
key_pts = key_pts_frame.iloc[n, 1:].values.astype('float').reshape(-1, 2)
print('Image name: ', image_name)
plt.figure(figsize=(5, 5))
show_keypoints(image, key_pts)
plt.show()
Next, you'll see an example of placing sunglasses on the person in the loaded image.
Note that the keypoints are numbered off-by-one in the numbered image above, and so key_pts[0,:] corresponds to the first point (1) in the labelled image.
# Display sunglasses on top of the image in the appropriate place
# copy of the face image for overlay
image_copy = np.copy(image)
# top-left location for sunglasses to go
# 17 = edge of left eyebrow
x = int(key_pts[17, 0])
y = int(key_pts[17, 1])
# height and width of sunglasses
# h = length of nose
h = int(abs(key_pts[27,1] - key_pts[34,1]))
# w = left to right eyebrow edges
w = int(abs(key_pts[17,0] - key_pts[26,0]))
# read in sunglasses
sunglasses = cv2.imread('images/sunglasses.png', cv2.IMREAD_UNCHANGED)
# resize sunglasses
new_sunglasses = cv2.resize(sunglasses, (w, h), interpolation = cv2.INTER_CUBIC)
# get region of interest on the face to change
roi_color = image_copy[y:y+h,x:x+w]
# find all non-transparent pts
ind = np.argwhere(new_sunglasses[:,:,3] > 0)
# for each non-transparent point, replace the original image pixel with that of the new_sunglasses
for i in range(3): roi_color[ind[:,0],ind[:,1],i] = new_sunglasses[ind[:,0],ind[:,1],i]
# set the area of the image to the changed region with sunglasses
image_copy[y:y+h,x:x+w] = roi_color
# display the result!
plt.imshow(image_copy)
Look in the images/ directory to see other available .png's for overlay! Also, you may notice that the overlay of the sunglasses is not entirely perfect; you're encouraged to play around with the scale of the width and height of the glasses and investigate how to perform image rotation in OpenCV so as to match an overlay with any facial pose.
# Using https://github.com/jrosebr1/imutils and https://www.pyimagesearch.com/2017/05/22/face-alignment-with-opencv-and-python/
# I saw that we can have a good idea of the angle to which display the filter by looking at the eyes
# compute the center of mass for each eye
def getAngleImage(shape):
from collections import OrderedDict
FACIAL_LANDMARKS_5_IDXS = OrderedDict([
("right_eye", (42, 47)),
("left_eye", (36, 41)),
("nose", (27, 35))
])
(lStart, lEnd) = FACIAL_LANDMARKS_5_IDXS["left_eye"]
(rStart, rEnd) = FACIAL_LANDMARKS_5_IDXS["right_eye"]
#leftEyePts = shape[lStart:lEnd]
#rightEyePts = shape[rStart:rEnd]
leftEyePts = shape[36,:]
rightEyePts = shape[45,:]
#leftEyeCenter = leftEyePts.mean(axis=0).astype("int")
#rightEyeCenter = rightEyePts.mean(axis=0).astype("int")
leftEyeCenter = leftEyePts
rightEyeCenter = rightEyePts
# compute the angle between the eye centroids
dY = rightEyeCenter[1] - leftEyeCenter[1]
dX = rightEyeCenter[0] - leftEyeCenter[0]
angle = np.degrees(np.arctan2(dY, dX))
return angle
def getNumberByImageName(name):
return np.argwhere(key_pts_frame.iloc[:,0].values == name).ravel()[0]
def plotFilterInImage(image, filterpath, key_pts, x, y, h, w):
image_copy = np.copy(image)
imgh, imgw = image_copy.shape[0], image_copy.shape[1]
filter = cv2.imread(filterpath, cv2.IMREAD_UNCHANGED)
new_filter = cv2.resize(filter, (w, h), interpolation = cv2.INTER_CUBIC)
#pad
pad_left, pad_right = x, imgw - (x + w)
pad_top, pad_bottom = y, imgh - (y + h)
# pad filter to be from same shape as image
new_filter_pad = np.pad(new_filter, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), mode='constant', constant_values=0)
# Rotate picture according to angle of face
angle = getAngleImage(key_pts)
M = cv2.getRotationMatrix2D((x,y), -angle, 1)
new_filter_rot = cv2.warpAffine(new_filter_pad, M, (imgw, imgh))
ind = np.argwhere(new_filter_rot[:,:,3] > 0)
for i in range(3):
image_copy[ind[:,0],ind[:,1],i] = new_filter_rot[ind[:,0],ind[:,1],i]
# debug
# plt.figure(figsize=(25,10))
# plt.subplot(1,3,1)
# plt.imshow(new_filter_pad)
# plt.subplot(1,3,2)
# plt.imshow(new_filter_rot)
# plt.subplot(1,3,3)
# plt.imshow(image_copy)
return image_copy
# helper function to display keypoints
def show_keypoints(image, key_pts):
"""Show image with keypoints"""
fig = plt.figure(figsize=(15, 15))
ax = fig.add_subplot(111)
plt.imshow(image)
ax.scatter(key_pts[:, 0], key_pts[:, 1], s=20, marker='.', c='m')
for i, (x,y) in enumerate(zip(key_pts[:, 0], key_pts[:, 1])):
ax.annotate(i, (x, y))
plt.show()
# choose tilted head to test angle
n = getNumberByImageName('Carlos_Ghosn_10.jpg')
image_name = key_pts_frame.iloc[n, 0]
image = mpimg.imread(os.path.join('data/training/', image_name))
key_pts = key_pts_frame.iloc[n, 1:].values.astype('float').reshape(-1, 2)
show_keypoints(image, key_pts)
n = getNumberByImageName('Carlos_Ghosn_10.jpg')
image_name = key_pts_frame.iloc[n, 0]
image = mpimg.imread(os.path.join('data/training/', image_name))
key_pts = key_pts_frame.iloc[n, 1:].values.astype('float').reshape(-1, 2)
LFT_CORNER_FACE = 0
EYEBROWN = 18
TOP_NOSE_POINT = 29
TOP_EYEBROW = 19
LEFT_EYEBROW = 17
RIGHT_EYEBROW = 26
x_glasses = int(np.mean([key_pts[LFT_CORNER_FACE, 0], key_pts[LEFT_EYEBROW, 0]]))
y_glasses = int(np.mean([key_pts[EYEBROWN, 1], key_pts[TOP_EYEBROW, 1]]))
h_glasses = int(abs(key_pts[TOP_NOSE_POINT,1] - key_pts[TOP_EYEBROW,1]))
w_glasses = int(abs(key_pts[LEFT_EYEBROW,0] - key_pts[RIGHT_EYEBROW,0]))
img_filtered = plotFilterInImage(image, 'images/sunglasses.png', key_pts, x_glasses, y_glasses, h_glasses, w_glasses)
plt.imshow(img_filtered)
n = getNumberByImageName('Carlos_Ghosn_10.jpg')
image_name = key_pts_frame.iloc[n, 0]
image = mpimg.imread(os.path.join('data/training/', image_name))
key_pts = key_pts_frame.iloc[n, 1:].values.astype('float').reshape(-1, 2)
LFT_CHEEK_FACE = 4
BOTTOM_NOSE = 33
TOP_LIP = 60
MIDDLE_NOSE = 29
RIGHT_CHEEK_FACE = 13
x_mustache = int(key_pts[LFT_CHEEK_FACE, 0] *0.8) # mustache usually shifts a bit to the left so decrease by 80%
y_mustache = int(key_pts[BOTTOM_NOSE, 1])
h_mustache = int(abs(key_pts[TOP_LIP,1] - key_pts[MIDDLE_NOSE,1]))
w_mustache = int(abs(key_pts[LFT_CHEEK_FACE,0] - key_pts[RIGHT_CHEEK_FACE,0]))
img_filtered = plotFilterInImage(image, 'images/moustache.png', key_pts, x_mustache, y_mustache, h_mustache, w_mustache)
plt.imshow(img_filtered)
n = getNumberByImageName('Carlos_Ghosn_10.jpg')
image_name = key_pts_frame.iloc[n, 0]
image = mpimg.imread(os.path.join('data/training/', image_name))
key_pts = key_pts_frame.iloc[n, 1:].values.astype('float').reshape(-1, 2)
RIGHT_CHEEK_FACE = 13
LFT_CORNER_FACE = 0
x_hat = int(key_pts[LFT_CORNER_FACE, 0] * 0.3) # hat a bit out of the face
y_hat = 0 # start the hat at top of image
h_hat = int(min(key_pts[:,1]) * 1.1) #check the highest point of the face (which will be on the forehead)
w_hat = int(abs(key_pts[LFT_CORNER_FACE,0] - key_pts[RIGHT_CHEEK_FACE,0]) * 1.4) #hat increase by 40% the shape of face
img_filtered = plotFilterInImage(image, 'images/straw_hat.png', key_pts, x_hat, y_hat, h_hat, w_hat)
plt.imshow(img_filtered)
def getMouthCurvature(n):
from scipy.optimize import curve_fit
from math import pi, cos, sin
def func1(x, a, b, c):
return a*x**2+b*x+c
image = cv2.imread(os.path.join('data/training/', key_pts_frame.iloc[n, 0]))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
key_pts = key_pts_frame.iloc[n, 1:].values.astype('float').reshape(-1, 2)
mouth = key_pts[54:60]
angle = getAngleImage(key_pts)
# get points from mouth sorted by X
x, y = sorted(mouth[:, 0]), [a for _, a in sorted(zip(mouth[:, 0], mouth[:, 1]))]
# find a curve that best describe the mouth
popt, pcov = curve_fit(func1, x, y)
a, b, c = popt[0], popt[1], popt[2]
y = np.array([int(func1(x1, a, b, c)) for x1 in x ])
x = np.array(x)
# Formula found in:
# http://danceswithcode.net/engineeringnotes/rotations_in_2d/rotations_in_2d.html
#x1 = (x0 – xc)cos(θ) – (y0 – yc)sin(θ) + xc
#y1 = (x0 – xc)sin(θ) + (y0 – yc)cos(θ) + yc
x_rotated = (x - x[0]) * cos(-angle * pi / 180) - (y - y[0]) * sin(-angle * pi / 180) + x[0]
y_rotated = (x - x[0]) * sin(-angle * pi / 180) + (y - y[0]) * cos(-angle * pi / 180) + y[0]
x_rotated = list(map(int, x_rotated))
y_rotated = list(map(int, y_rotated))
return image, mouth, x, y, x_rotated, y_rotated
# 4 possible outcomes: Big Smile | Smiling | Neutral | Sad
def getSmile(x, y):
BIGSMILE, SMILE = "BigSmile: \U0001F600", "Smile: \U0001F642"
NEUTRAL, SAD = "Neutral: \U0001F611", "Sad: \U0001F610"
threshold_smile, bigsmilethreshold = 4, 6 #pixels
# get the extreme measurements of mouth and then the upper and lower point from mouth
leftmost, rightmost = (x[0], y[0]), (x[-1], y[-1])
# find the center of the main line of mouth
centerpoint = ((rightmost[0] + leftmost[0]) // 2 ,
(rightmost[1] + leftmost[1]) // 2)
# get the lowest point of the curve
i_lower = np.argwhere(np.array(y) == max(y)).ravel()[0]
# get the highest point of the curve
i_higher = np.argwhere(np.array(y) == min(y)).ravel()[0]
upperpoint, lowerpoint = (x[i_higher], y[i_higher]), (x[i_lower], y[i_lower])
# calculate the distance from highest point of the curve to center
highest_center = abs(upperpoint[1] - max(rightmost[1], leftmost[1]))
# calculate the distance from lowest point of the curve to center
lowest_center = abs(lowerpoint[1] - min(rightmost[1], leftmost[1]))
diff_highest_lowest_from_center = highest_center - lowest_center
#print(diff_highest_lowest_from_center)
# if the difference from highest is bigger, means it's either Neutral or Sad
if diff_highest_lowest_from_center > 0:
return SAD, diff_highest_lowest_from_center #otherwise it's sad
elif diff_highest_lowest_from_center > -threshold_smile: return NEUTRAL, diff_highest_lowest_from_center
elif -bigsmilethreshold < diff_highest_lowest_from_center <= -threshold_smile: return SMILE, diff_highest_lowest_from_center
else: return BIGSMILE, diff_highest_lowest_from_center
# 1585 - smile
# 3372 - big smile
# 1979 - Neutral
# 339 - Sad
image, mouth, x, y, x_rotated, y_rotated = getMouthCurvature(1585)
emoj, res = getSmile(x_rotated, y_rotated)
print(emoj)
fig = plt.figure(figsize=(15, 15))
ax = fig.add_subplot(111)
ax.scatter(mouth[:,0], mouth[:,1],c='blue')
ax.plot(x,y, linewidth=2.0, c='g')
ax.plot(x_rotated,y_rotated, linewidth=3.0 ,c='m')
plt.imshow(image)
plt.show()
# 1585 - smile
# 3372 - big smile
# 1979 - Neutral
# 339 - Sad
image, mouth, x, y, x_rotated, y_rotated = getMouthCurvature(3372)
emoj, res = getSmile(x_rotated, y_rotated)
print(emoj)
fig = plt.figure(figsize=(15, 15))
ax = fig.add_subplot(111)
ax.scatter(mouth[:,0], mouth[:,1],c='blue')
ax.plot(x,y, linewidth=2.0, c='g')
ax.plot(x_rotated,y_rotated, linewidth=3.0 ,c='m')
plt.imshow(image)
plt.show()
# 1585 - smile
# 3372 - big smile
# 1979 - Neutral
# 339 - Sad
image, mouth, x, y, x_rotated, y_rotated = getMouthCurvature(1979)
emoj, res = getSmile(x_rotated, y_rotated)
print(emoj)
fig = plt.figure(figsize=(15, 15))
ax = fig.add_subplot(111)
ax.scatter(mouth[:,0], mouth[:,1],c='blue')
ax.plot(x,y, linewidth=2.0, c='g')
ax.plot(x_rotated,y_rotated, linewidth=3.0 ,c='m')
plt.imshow(image)
plt.show()
# 1585 - smile
# 3372 - big smile
# 1979 - Neutral
# 339 - Sad
image, mouth, x, y, x_rotated, y_rotated = getMouthCurvature(339)
emoj, res = getSmile(x_rotated, y_rotated)
print(emoj)
fig = plt.figure(figsize=(15, 15))
ax = fig.add_subplot(111)
ax.scatter(mouth[:,0], mouth[:,1],c='blue')
ax.plot(x,y, linewidth=2.0, c='g')
ax.plot(x_rotated,y_rotated, linewidth=3.0 ,c='m')
plt.imshow(image)
plt.show()
def faceMask(image, key_pts, e_sizeh=0, e_sizew=0):
img_masked = np.copy(image)
nose = key_pts[29,:]
head = key_pts[0:16,:]
if e_sizeh == 0 == e_sizew: h, w = (max(head[:,0]) - nose [0]) * 1.1, (max(head[:,1]) - nose [1]) * 1.1
else: h, w = e_sizeh, e_sizew
center = (int(nose[0]), int(nose[1]))
axes = (int(h) , int(w)) # first, second
angle = 0 #getAngleImage(key_pts)
color = (0, 177, 64) # green color in RGB
img_masked=cv2.ellipse(img_masked, center , axes, angle, 0., 360, color , -1)
return img_masked, h, w, center
def prepareImageSwap(n, e_sizeh=0, e_sizew=0):
IMAGE_SIZE = 224
image = cv2.imread(os.path.join('data/training/', key_pts_frame.iloc[n, 0]))
scale = [IMAGE_SIZE / image.shape[1], IMAGE_SIZE / image.shape[0]]
image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv2.INTER_CUBIC)
key_pts = key_pts_frame.iloc[n, 1:].values.astype('float').reshape(-1, 2)
key_pts *= scale
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_masked, h, w, center = faceMask(image, key_pts, e_sizeh, e_sizew)
mask = cv2.inRange(image_masked, (0,177,64), (0,177,64))
face_img = np.copy(image)
face_img[mask == 0] = [0, 0, 0]
return image, key_pts, mask, h, w, center
def swapTwoFaces(n1, n2):
image1, key_pts1, mask1, h, w, center1 = prepareImageSwap(n1)
image2, key_pts2, mask2, _, _, center2 = prepareImageSwap(n2, h, w)
diffx, diffy = center1[0] - center2[0], center1[1] - center2[1]
colx, coly = np.zeros((abs(diffx),image2.shape[1], 3)), np.zeros((image2.shape[0],abs(diffy), 3))
if diffx > 0: face_img2_moved = np.vstack([colx, image2[:-diffx,:,:]])
else: face_img2_moved = np.vstack([image2[-diffx:,:,:], colx])
if diffy > 0: face_img2_moved = np.hstack([coly, face_img2_moved[:,:-diffy,:]])
else: face_img2_moved = np.hstack([face_img2_moved[:,-diffy:,:], coly])
diffx *= -1; diffy *= -1
if diffx > 0: face_img1_moved = np.vstack([colx, image1[:-diffx,:,:]])
else: face_img1_moved = np.vstack([image1[-diffx:,:,:], colx])
if diffy > 0: face_img1_moved = np.hstack([coly, face_img1_moved[:,:-diffy,:]])
else: face_img1_moved = np.hstack([face_img1_moved[:,-diffy:,:], coly])
ind1 = np.argwhere(mask1 == 255)
ind2 = np.argwhere(mask2 == 255)
image_masked1 = np.copy(image1)
image_masked2 = np.copy(image2)
for i in range(3):
image_masked1[ind1[:,0], ind1[:,1] ,i] = face_img2_moved[ind1[:,0],ind1[:,1],i]
image_masked2[ind2[:,0], ind2[:,1] ,i] = face_img1_moved[ind2[:,0],ind2[:,1],i]
return image_masked1, image_masked2
img1, img2 = swapTwoFaces(1615, 2488)
fig = plt.figure(figsize=(15, 15))
plt.subplot(241)
plt.imshow((img1).astype(np.uint8))
plt.subplot(242)
plt.imshow((img2).astype(np.uint8))
img1, img2 = swapTwoFaces(25, 248)
fig = plt.figure(figsize=(15, 15))
plt.subplot(241)
plt.imshow((img1).astype(np.uint8))
plt.subplot(242)
plt.imshow((img2).astype(np.uint8))
img1, img2 = swapTwoFaces(211, 338)
fig = plt.figure(figsize=(15, 15))
plt.subplot(241)
plt.imshow((img1).astype(np.uint8))
plt.subplot(242)
plt.imshow((img2).astype(np.uint8))
img1, img2 = swapTwoFaces(532, 849)
fig = plt.figure(figsize=(15, 15))
plt.subplot(241)
plt.imshow((img1).astype(np.uint8))
plt.subplot(242)
plt.imshow((img2).astype(np.uint8))
def getImagePose(n):
image_name = key_pts_frame.iloc[n, 0]
image_ori = mpimg.imread(os.path.join('data/training/', image_name))
key_pts = key_pts_frame.iloc[n, 1:].values.astype('float').reshape(-1, 2)
image = np.copy(image_ori)
# Based on Tuturial
# https://learnopencv.com/head-pose-estimation-using-opencv-and-dlib/
NOSE_TIP = key_pts[29,:]
CHIN = key_pts[8,:]
LEFT_EYE = key_pts[36,:]
RIGHT_EYE = key_pts[45,:]
LEFT_MOUTH = key_pts[48,:]
RIGHT_MOUTH = key_pts[54,:]
size = image.shape
image_points = np.vstack([NOSE_TIP, CHIN, LEFT_EYE, RIGHT_EYE, LEFT_MOUTH, RIGHT_MOUTH])
# Camera internals - unchanged from Tutorial
focal_length = size[1]
center = (size[1] // 2, size[0] // 2)
camera_matrix = np.array(
[[focal_length, 0, center[0]],
[0, focal_length, center[1]],
[0, 0, 1]], dtype = "double"
)
# 3D model points. - unchanged from Tutorial
model_points = np.array([
(0.0, 0.0, 0.0), # Nose tip
(0.0, -330.0, -65.0), # Chin
(-225.0, 170.0, -135.0), # Left eye left corner
(225.0, 170.0, -135.0), # Right eye right corne
(-150.0, -150.0, -125.0), # Left Mouth corner
(150.0, -150.0, -125.0) # Right mouth corner
])
dist_coeffs = np.zeros((4,1)) # Assuming no lens distortion
(success, rotation_vector, translation_vector) = cv2.solvePnP(model_points, image_points, camera_matrix, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE)
# Project a 3D point (0, 0, 1000.0) onto the image plane.
# We use this to draw a line sticking out of the nose
(nose_end_point2D, jacobian) = cv2.projectPoints(np.array([(0.0, 0.0, 1000.0)]), rotation_vector, translation_vector, camera_matrix, dist_coeffs)
for p in image_points:
cv2.circle(image, (int(p[0]), int(p[1])), 3, (0,0,255), -1)
p1 = ( int(image_points[0][0]), int(image_points[0][1]))
p2 = ( int(nose_end_point2D[0][0][0]), int(nose_end_point2D[0][0][1]))
cv2.line(image, p1, p2, (255,0,0), 2)
return image_ori, image, rotation_vector, translation_vector, p1, p2
threshold = 9
fig = plt.figure(figsize=(15, 15))
for n in range(20):
i = np.random.randint(0, key_pts_frame.shape[0])
image_ori, image, rotation_vector, translation_vector, pNose, pObs = getImagePose(i)
diffx, diffy = pObs[0] - pNose[0], pObs[1] - pNose[1]
pos = 'Looking '
if -threshold < diffx < threshold: pos += 'Central '
elif diffx < 0: pos += 'Left '
else: pos += 'Right '
if -threshold < diffy < threshold: pos += 'to center image'
elif diffy < 0: pos += 'Upwards'
else: pos += 'Downwards'
plt.subplot(4,5,n +1)
plt.title(pos, color='red')
plt.imshow(image)
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import matplotlib.image as mpimg
import os
import numpy as np
import matplotlib.pyplot as plt
class FacialKeypointsDataset(Dataset):
"""Face Landmarks dataset."""
def __init__(self, csv_file, root_dir, transform=None):
"""
Args:
csv_file (string): Path to the csv file with annotations.
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.key_pts_frame = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.key_pts_frame)
def __getitem__(self, idx):
image_name = os.path.join(self.root_dir,
self.key_pts_frame.iloc[idx, 0])
image = mpimg.imread(image_name)
# if image has an alpha color channel, get rid of it
if(image.shape[2] == 4):
image = image[:,:,0:3]
key_pts = self.key_pts_frame.iloc[idx, 1:].values.astype('float').reshape(-1, 2)
sample = {'image': image, 'keypoints': key_pts}
if self.transform:
sample = self.transform(sample)
return sample
import torch
from torchvision import transforms, utils
# tranforms
class Normalize(object):
"""Convert a color image to grayscale and normalize the color range to [0,1]."""
def __call__(self, sample):
image, key_pts = sample['image'], sample['keypoints']
image_copy = np.copy(image)
key_pts_copy = np.copy(key_pts)
# convert image to grayscale
image_copy = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# scale color range from [0, 255] to [0, 1]
image_copy= image_copy/255.0
# scale keypoints to be centered around 0 with a range of [-1, 1]
# mean = 100, sqrt = 50, so, pts should be (pts - 100)/50
key_pts_copy = (key_pts_copy - 100)/50.0
return {'image': image_copy, 'keypoints': key_pts_copy}
class Rescale(object):
"""Rescale the image in a sample to a given size.
Args:
output_size (tuple or int): Desired output size. If tuple, output is
matched to output_size. If int, smaller of image edges is matched
to output_size keeping aspect ratio the same.
"""
def __init__(self, output_size):
assert isinstance(output_size, (int, tuple))
self.output_size = output_size
def __call__(self, sample):
image, key_pts = sample['image'], sample['keypoints']
h, w = image.shape[:2]
if isinstance(self.output_size, int):
if h > w:
new_h, new_w = self.output_size * h / w, self.output_size
else:
new_h, new_w = self.output_size, self.output_size * w / h
else:
new_h, new_w = self.output_size
new_h, new_w = int(new_h), int(new_w)
img = cv2.resize(image, (new_w, new_h))
# scale the pts, too
key_pts = key_pts * [new_w / w, new_h / h]
return {'image': img, 'keypoints': key_pts}
class RandomCrop(object):
"""Crop randomly the image in a sample.
Args:
output_size (tuple or int): Desired output size. If int, square crop
is made.
"""
def __init__(self, output_size):
assert isinstance(output_size, (int, tuple))
if isinstance(output_size, int):
self.output_size = (output_size, output_size)
else:
assert len(output_size) == 2
self.output_size = output_size
def __call__(self, sample):
image, key_pts = sample['image'], sample['keypoints']
h, w = image.shape[:2]
new_h, new_w = self.output_size
top = np.random.randint(0, h - new_h)
left = np.random.randint(0, w - new_w)
image = image[top: top + new_h,
left: left + new_w]
key_pts = key_pts - [left, top]
return {'image': image, 'keypoints': key_pts}
class ToTensor(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample):
image, key_pts = sample['image'], sample['keypoints']
# if image has no grayscale color channel, add one
if(len(image.shape) == 2):
# add that third color dim
image = image.reshape(image.shape[0], image.shape[1], 1)
# swap color axis because
# numpy image: H x W x C
# torch image: C X H X W
image = image.transpose((2, 0, 1))
return {'image': torch.from_numpy(image),
'keypoints': torch.from_numpy(key_pts)}
from math import pi, cos, sin
class Rotate(object):
"""Rotate the image .
Args:
angle int: Desired angle to rotate clockwise.
"""
def __init__(self, angle):
assert isinstance(angle, int), "angle not int"
assert 0 <= angle <= 360, "angle needs to be between 0 - 360 (inclusive)"
self.rotation_angle = angle
def __call__(self, sample):
angle = self.rotation_angle
image, key_pts = sample['image'], sample['keypoints']
w_h = np.array(image.shape[1::-1])
image_center = tuple(w_h / 2)
#changed angle to - to apply clockwise rotation
rot_mat = cv2.getRotationMatrix2D(image_center, -angle, 1.0)
image = cv2.warpAffine(image, rot_mat, tuple(w_h), flags=cv2.INTER_LINEAR)
x, y = key_pts[:,0], key_pts[:,1]
x_rotated = (x - image_center[0]) * cos(angle * pi / 180) - (y - image_center[1]) * sin(angle * pi / 180) + image_center[0]
y_rotated = (x - image_center[0]) * sin(angle * pi / 180) + (y - image_center[1]) * cos(angle * pi / 180) + image_center[1]
key_pts = np.stack([x_rotated, y_rotated], axis=1)
return {'image': image, 'keypoints': key_pts}
def show_keypoints(image, key_pts):
"""Show image with keypoints"""
plt.imshow(image)
plt.scatter(key_pts[:, 0], key_pts[:, 1], s=20, marker='.', c='m')
# test out some of these transforms
rescale = Rescale(224)
crop = RandomCrop(10)
rotate = Rotate(25)
composed = transforms.Compose([Rescale(250),
RandomCrop(224)])
# apply the transforms to a sample image
test_num = 530
face_dataset = FacialKeypointsDataset(csv_file='data/training_frames_keypoints.csv',
root_dir='data/training/')
sample = face_dataset[test_num]
plt.figure(figsize=(15,15))
for i, tx in enumerate([rescale, crop, composed, rotate]):
transformed_sample = tx(sample)
ax = plt.subplot(1, 4, i + 1)
plt.tight_layout()
ax.set_title(type(tx).__name__, color='orange')
show_keypoints(transformed_sample['image'], transformed_sample['keypoints'])
plt.show()
def show_keypoints(image, key_pts):
"""Show image with keypoints"""
plt.imshow(image)
plt.scatter(key_pts[:, 0], key_pts[:, 1], s=20, marker='.', c='m')
# test out some of these transforms
test_num = np.random.randint(0, len(face_dataset))
rotate = Rotate(25)
sample = face_dataset[test_num]
plt.figure(figsize=(15,15))
transformed_sample = rotate(sample)
ax = plt.subplot(1, 4, i + 1)
plt.tight_layout()
ax.set_title(type(tx).__name__, color='orange')
show_keypoints(transformed_sample['image'], transformed_sample['keypoints'])
plt.show()